{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This example shows how to scroll through a website using AgentQL with the [Google Colaboratory](https://colab.research.google.com/) environment."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install [AgentQL](https://pypi.org/project/agentql/) library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install agentql"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install the Playwright dependency required by AgentQL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!playwright install chromium"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can [store](https://medium.com/@parthdasawant/how-to-use-secrets-in-google-colab-450c38e3ec75) keys in Google Colab's secrets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from google.colab import userdata\n",
    "\n",
    "os.environ[\"AGENTQL_API_KEY\"]=userdata.get('AGENTQL_API_KEY')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run AgentQL script. Unfortunately, you can't see the browser window directly in Google Colab like you would on your local machine. However, let's still interact with it and take a screen recording of the browser session to see what’s happening. \n",
    "\n",
    "Please note that an online environment like Google Colab supports **asynchronous version** of AgentQL."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import random\n",
    "import asyncio\n",
    "\n",
    "import agentql\n",
    "from playwright.async_api import async_playwright\n",
    "from agentql.ext.playwright.async_api import Page\n",
    "from IPython.display import HTML\n",
    "from base64 import b64encode\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG)\n",
    "log = logging.getLogger(__name__)\n",
    "\n",
    "\n",
    "async def key_press_end_scroll(page: Page):\n",
    "    await page.keyboard.press(\"End\")\n",
    "\n",
    "\n",
    "async def mouse_wheel_scroll(page: Page):\n",
    "    viewport_height, total_height, scroll_height = await page.evaluate(\n",
    "        \"() => [window.innerHeight, document.body.scrollHeight, window.scrollY]\"\n",
    "    )\n",
    "    while scroll_height < total_height:\n",
    "        scroll_height = scroll_height + viewport_height\n",
    "        await page.mouse.wheel(delta_x=0, delta_y=viewport_height)\n",
    "        asyncio.sleep(random.uniform(0.05, 0.1))\n",
    "\n",
    "\n",
    "QUERY = \"\"\"\n",
    "{\n",
    "    page_title\n",
    "    post_headers[]\n",
    "}\n",
    "\"\"\"\n",
    "\n",
    "async with async_playwright() as playwright, await playwright.chromium.launch() as browser:\n",
    "\n",
    "    # Set up the video recording\n",
    "    video_dir = os.path.abspath(\"videos\")\n",
    "    context = await browser.new_context(\n",
    "      record_video_dir=\"videos/\",\n",
    "      record_video_size={\"width\": 1280, \"height\": 720}\n",
    "    )\n",
    "    page = await agentql.wrap_async(await context.new_page())\n",
    "    \n",
    "    log.info(\"Navigating to the page...\")\n",
    "\n",
    "    await page.goto(\"https://infinite-scroll.com/demo/full-page/\")\n",
    "\n",
    "    await page.wait_for_page_ready_state()\n",
    "\n",
    "    num_extra_pages_to_load = 3\n",
    "\n",
    "    for times in range(num_extra_pages_to_load):\n",
    "        log.info(f\"Scrolling to the bottom of the page... (num_times = {times+1})\")\n",
    "        await key_press_end_scroll(page)\n",
    "        await page.wait_for_page_ready_state()\n",
    "        log.info(\"Content loaded!\")\n",
    "\n",
    "    log.info(\"Issuing AgentQL data query...\")\n",
    "    response = await page.query_data(QUERY)\n",
    "\n",
    "    log.info(f\"AgentQL response: {response}\")\n",
    "\n",
    "    # Display the video\n",
    "    video_files = [f for f in os.listdir(video_dir) if f.endswith('.webm')]\n",
    "    if video_files:\n",
    "        video_path = os.path.join(video_dir, video_files[0])\n",
    "        with open(video_path, 'rb') as f:\n",
    "          video_bytes = f.read()\n",
    "            \n",
    "        video_b64 = b64encode(video_bytes).decode('utf-8')\n",
    "        video_html = f\"\"\"\n",
    "        <video width=\"800\" controls>\n",
    "          <source src=\"data:video/webm;base64,{video_b64}\" type=\"video/webm\">\n",
    "          Your browser does not support the video tag.\n",
    "        </video>\n",
    "        \"\"\"\n",
    "        display(HTML(video_html))\n",
    "    else:\n",
    "        print(\"No video file was created\")     "
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
